library(scales) # Parsed axis labels in figures (e.g., Greek letters)
library(knitr) # Neatly formatted tables (for output)
library(kableExtra) # More options for neatly formatting tables
library(jsonlite) # Converting JSON files to data frames
library(gridExtra) # Arranging multiple plots in a grid
library(tidyverse) # Efficient data manipulation and plotting
library(broom) # Tabular formatting of statistical model outputs
library(ggfortify) # Diagnostic plots for linear statistical models
# Load server ip addresses from a designated config file, arranged in
# a table. The table will have three columns: `server` (whether we
# are storing the ip of Server 1, Server 2, etc.), `platform` (Swarm,
# IPFS, or Arweave), and `ip` (the actual ip address).
serversFromConfig <- function(configFile = "../config.json") {
# Read JSON data from file and convert to data frame:
fromJSON(configFile) |>
# Convert data frame to a tibble:
as_tibble() |>
# Choose only the three columns pertaining to the servers:
select(contains("dl")) |>
# Label them as "Server 1", "Server 2", ...:
mutate(server = str_c("Server ", 1:3), .before = 1) |>
# Simplify column names by dropping the "_dl_servers" suffix:
rename_with(\(x) str_remove(x, "_dl_servers"), !server) |>
# Tidy the data:
pivot_longer(!server, names_to = "platform", values_to = "ip") |>
# Change storage platform names to reflect proper capitalization:
mutate(platform = case_match(
platform,
"swarm" ~ "Swarm",
"ipfs" ~ "IPFS",
"arw" ~ "Arweave"
))
}
# Load result data of the benchmarking experiment from a JSON file,
# and arrange them in a rectangular table:
dataFromJsonRaw <- function(jsonFile = "../results.json") {
# Read JSON data file:
fromJSON(jsonFile) |>
# Convert to a tibble:
as_tibble() |>
# Unpack the nested `tests` column:
unnest(tests) |>
# And then the sub-nested `results` column:
unnest(results) |>
# Give new names to some of the columns:
rename(time_sec = download_time_seconds,
replicate = ref,
platform = storage)
}
# Take the raw data generated by dataFromJsonRaw(), and tidy it up:
dataFromJson <- function(rawTable) {
# Start from the tibble generated by dataFromJsonRaw():
rawTable |>
# Convert the JSON true/false into R's native TRUE and FALSE:
mutate(sha256_match = (sha256_match == "true")) |>
# File size is a character string; convert to integer:
mutate(size_kb = as.integer(size)) |>
# Remove unnecessary columns:
select(!size & !server & !timestamp) |>
# Properly capitalize IPFS in the `platform` column - important
# for matching with the server ip data from serversFromConfig():
mutate(platform = ifelse(platform == "Ipfs","IPFS",platform)) |>
# Now join table with server ip info, so we'll know which ip
# is Server 1, which is Server 2, etc.:
left_join(serversFromConfig(), by = join_by(platform, ip)) |>
# Rearrange the order of the columns:
relocate(size_kb, server, time_sec, attempts, sha256_match,
.after = platform)
}Analysis of first run of the benchmarking experiment
Loading and tidying the data
We first set up some functions to load and tidy the raw data:
After loading and tidying the data, here’s what the first few rows of the table look like:
dat <- dataFromJson(dataFromJsonRaw())
dat |>
head(n = 10) |>
kable()| platform | size_kb | server | time_sec | attempts | sha256_match | ip | latitude | longitude | replicate |
|---|---|---|---|---|---|---|---|---|---|
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27 |
| Swarm | 1 | Server 2 | 0.1379130 | 1 | TRUE | 188.245.154.61:1633 | 49.4542 | 11.0775 | 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27 |
| Swarm | 1 | Server 3 | 0.1654890 | 1 | TRUE | 188.245.177.151:1633 | 49.4542 | 11.0775 | 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27 |
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7 |
| Swarm | 1 | Server 2 | 0.3394287 | 1 | TRUE | 188.245.154.61:1633 | 49.4542 | 11.0775 | 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7 |
| Swarm | 1 | Server 3 | 0.1937695 | 1 | TRUE | 188.245.177.151:1633 | 49.4542 | 11.0775 | 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7 |
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56 |
| Swarm | 1 | Server 2 | 0.3230913 | 1 | TRUE | 188.245.154.61:1633 | 49.4542 | 11.0775 | 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56 |
| Swarm | 1 | Server 3 | 0.3515806 | 1 | TRUE | 188.245.177.151:1633 | 49.4542 | 11.0775 | 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56 |
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 7049fe5e08fd855c3b89788a317f51bf844c20ecf3c5f71f863d8a7c9ed2af0d |
We can do some sanity checks. First of all, the experiment is well balanced, with 30 replicates per size, server, and platform:
dat |>
count(size_kb, server, platform, name = "number of replicates") |>
count(`number of replicates`,
name = "size-server-platform combinations") |>
kable()| number of replicates | size-server-platform combinations |
|---|---|
| 30 | 45 |
And the replicates are also correctly assigned:
dat |>
count(server, replicate, name = "number of replicates") |>
count(`number of replicates`,
name = "server-replicate combinations") |>
kable()| number of replicates | server-replicate combinations |
|---|---|
| 1 | 1350 |
Let us check if any of the sha256 matches failed:
dat |>
count(sha256_match) |>
kable()| sha256_match | n |
|---|---|
| FALSE | 150 |
| TRUE | 1200 |
Indeed, there are 150 failures. Let us check where those failed attempts are:
dat |>
filter(!sha256_match) |>
count(platform, size_kb, server) |>
kable()| platform | size_kb | server | n |
|---|---|---|---|
| Swarm | 1 | Server 1 | 30 |
| Swarm | 10 | Server 1 | 30 |
| Swarm | 100 | Server 1 | 30 |
| Swarm | 1000 | Server 1 | 30 |
| Swarm | 10000 | Server 1 | 30 |
In short, all Swarm downloads on Server 1 have failed, and nothing else.
Those same failed downloads also always had 15 download attempts. All other downloads succeeded in a single attempt:
dat |>
count(platform, attempts, server) |>
pivot_wider(names_from = platform, values_from = attempts) |>
relocate(Swarm, IPFS, Arweave, .after = n) |>
kable()| server | n | Swarm | IPFS | Arweave |
|---|---|---|---|---|
| Server 1 | 150 | 15 | 1 | 1 |
| Server 2 | 150 | 1 | 1 | 1 |
| Server 3 | 150 | 1 | 1 | 1 |
So everything in the data look OK at first glance except for the (Swarm, Server 1) combination.
Preliminary analysis
Plotting the raw results, we get:
dat |>
# Keep only those rows with a sha256 match:
filter(sha256_match) |>
# Choose only the relevant columns:
select(platform | size_kb | server | time_sec) |>
# Make sure that in plots, the order of storage platforms
# aligns with their median download times (so the order of the
# legend labels matches the order of appearance in the plot):
mutate(platform = fct_reorder(platform, time_sec)) |>
# Change file sizes to human-readable labels:
mutate(size = case_when(
size_kb == 1 ~ "1 KB",
size_kb == 10 ~ "10 KB",
size_kb == 100 ~ "100 KB",
size_kb == 1000 ~ "1 MB",
size_kb == 10000 ~ "10 MB"
)) |>
# Make sure file size labels are in the correct order:
mutate(size = fct_reorder(size, size_kb)) |>
# Create plot:
ggplot(aes(x = time_sec, color = platform, fill = platform)) +
geom_density(alpha = 0.2, bw = 0.05) +
scale_x_log10() +
labs(x = "Download time (seconds)", y = "Density",
color = "Platform: ", fill = "Platform: ") +
scale_color_manual(
values = c("steelblue", "goldenrod", "forestgreen")
) +
scale_fill_manual(
values = c("steelblue", "goldenrod", "forestgreen")
) +
facet_grid(server ~ size, scales = "fixed") +
theme_bw() +
theme(legend.position = "bottom", panel.grid = element_blank())Here we have retrieval times (on the log scale) along the x-axis and density of incidence along the y-axis. The curves are higher where there are more data. Colors represent the different storage platforms; facet rows are the different servers used, and facet columns are the various data sizes.
At a glance, we see that IPFS is the fastest. For small files, Swarm is faster than Arweave. For 10MB files, it is a bit slower but still comparable. Somewhat strangely, the Swarm distributions look bimodal, even on Server 2 and Server 3 where the downloads succeeded. This should probably be investigated further.
Now we check the relationship between file size and download times, for each unique platform-server combination (removing the faulty (Swarm, Server 1) data, of course):
# Create a new column, `plat_serv`, which contains the concatenated
# platform and server names, and with the correct ordering:
mergePlatformServer <- function(dat) {
dat |>
# Make sure platform-server combinations can be properly sorted:
mutate(platform = fct_relevel(platform,
"Swarm", "IPFS", "Arweave")) |>
arrange(platform, server, size_kb) |>
# Merge platform-server combinations, for plotting purposes:
mutate(plat_serv = as_factor(str_c(platform, ", ", server)))
}
# Create plot to study the relationship between file size
# and download times:
plotPlatformServerFit <- function(dat, x, y, formula = y ~ x,
method = lm, log_y = TRUE) {
ggplot(dat, aes(x = {{x}}, y = {{y}})) +
geom_point(color = "steelblue", alpha = 0.5) +
# Fit smoothing lines with prescribed method and model formula:
geom_smooth(color = "goldenrod", fill = "goldenrod",
method = method, formula = formula) +
scale_x_log10() +
# If log_y = TRUE, put y-axis on the log scale, otherwise don't:
{ if (log_y) scale_y_log10() else scale_y_continuous() } +
labs(x = "File size (KB)", y = "Download time (seconds)") +
facet_wrap(~ plat_serv, scales = "free_y") +
theme_bw()
}
dat |>
mergePlatformServer() |>
# Remove faulty data points, replacing download times of 0 with NA:
mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
plotPlatformServerFit(size_kb, time_sec, log_y = FALSE)These data have a clear increasing trend. They are also manifestly nonlinear, so fitting linear functions is unlikely to do well. Instead, let us try to fix this by putting download times on the log scale as well:
dat |>
mergePlatformServer() |>
# Remove faulty data points, replacing download times of 0 with NA:
mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
plotPlatformServerFit(size_kb, time_sec, log_y = TRUE)This is a lot better, although arguably the relationships are still somewhat nonlinear. That said, let us analyze this pattern further by performing a linear regression for each platform-server combination:
regressionDat <- dat |>
# Keep only those rows with a sha256 match:
filter(sha256_match) |>
# Create columns x (predictor; here log10 file size)
# and y (response; here log10 download time):
mutate(x = log10(size_kb), y = log10(time_sec)) |>
# Keep relevant columns only:
select(platform | server | x | y) |>
# Package up data into sub-tables for each unique
# platform-server combination:
nest(data = x | y) |>
# For each, perform linear model fit:
mutate(fit = map(data, \(dat) lm(y ~ x, data = dat))) |>
# For each model fit, extract and organize regression tables:
mutate(regtab = map(fit, broom::tidy)) |>
# Unpackage those regression tables:
unnest(regtab)Then we can inspect the regression statistics both for the intercepts and the slopes:
regressionDat |>
# Drop nested columns that we don't need here:
select(!data & !fit) |>
# Change fitted parameter names to be more human-readable:
mutate(term = ifelse(term == "(Intercept)","intercept","slope")) |>
# Sort rows, first showing all intercepts, then all slopes:
arrange(term, platform, server) |>
kable()| platform | server | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| Arweave | Server 1 | intercept | 0.0125603 | 0.0105703 | 1.188266 | 0.236632 |
| Arweave | Server 2 | intercept | 0.1716536 | 0.0056452 | 30.406813 | 0.000000 |
| Arweave | Server 3 | intercept | 0.2216665 | 0.0122245 | 18.133022 | 0.000000 |
| IPFS | Server 1 | intercept | -0.7221360 | 0.0308886 | -23.378736 | 0.000000 |
| IPFS | Server 2 | intercept | -1.0486442 | 0.0318166 | -32.959038 | 0.000000 |
| IPFS | Server 3 | intercept | -1.2726393 | 0.0126825 | -100.346280 | 0.000000 |
| Swarm | Server 2 | intercept | -0.6917008 | 0.0433090 | -15.971303 | 0.000000 |
| Swarm | Server 3 | intercept | -0.7425320 | 0.0470355 | -15.786638 | 0.000000 |
| Arweave | Server 1 | slope | 0.0972232 | 0.0043153 | 22.529853 | 0.000000 |
| Arweave | Server 2 | slope | 0.0546925 | 0.0023047 | 23.731274 | 0.000000 |
| Arweave | Server 3 | slope | 0.0700226 | 0.0049906 | 14.030858 | 0.000000 |
| IPFS | Server 1 | slope | 0.2271200 | 0.0126102 | 18.010803 | 0.000000 |
| IPFS | Server 2 | slope | 0.1642424 | 0.0129891 | 12.644666 | 0.000000 |
| IPFS | Server 3 | slope | 0.2057088 | 0.0051776 | 39.730539 | 0.000000 |
| Swarm | Server 2 | slope | 0.3540662 | 0.0176808 | 20.025443 | 0.000000 |
| Swarm | Server 3 | slope | 0.3630591 | 0.0192022 | 18.907212 | 0.000000 |
All parameters are significantly different from zero except for the intercept of Arweave on Server 1. To check how well the assumptions behind linear regression are fulfilled (and thus how much one can trust these results), we make diagnostic plots:
regressionDat |>
# Data are replicated across intercept and slope; drop the former:
filter(term != "(Intercept)") |>
# Order platforms as [Swarm, IPFS, Arweave]:
mutate(platform = fct_relevel(platform,
"Swarm", "IPFS", "Arweave")) |>
arrange(platform, server) |>
# Create a diagnostic plot for each regression:
mutate(diagnostics = map(fit, \(x) {
autoplot(x, smooth.colour = NA, alpha = 0.3,
colour = "steelblue") +
theme_bw()
} )) |>
# Embellish them with titles showing platform and server:
mutate(diagnostics = pmap(
list(diagnostics, platform, server),
\(dia, sto, se) {
grid.arrange(grobs = dia@plots, top = str_c(sto, ", ", se))
}
)) |>
suppressMessages() |>
capture.output() |>
# We only care about the plots, not the final tibble; suppress it:
invisible()Most of these diagnostics look acceptable, although the residuals clearly do depend on fitted values quite often. This is the case for both Server 2 and Server 3 on Swarm. The same is true for IPFS Server 2 and Arweave Server 1. In both cases, there are additionally large outliers distorting the results. In the other cases, the diagnostics look fine.
Building a predictive model
The above linear model convincingly establishes a positive relationship between log file size and log download times that is not simply due to chance. But it might not be the best model for prediction, because the relationship is manifestly nonlinear, yet the fitted curve was is linear function.
One possible improvement is to try fitting a quadratic or cubic curve. In fact, a suite of models will be fit below, and we will employ model selection to try and choose the best. The models are:
- Quadratic (
quad), with the formula \(y_i = \beta_0 + \beta_1 x_i + \beta_2 x_i^2 + \varepsilon_i\). Here \(y_i\) is the \(i\)th log download time (all logarithms are base 10), \(x_i\) is the \(i\)th log file size, the \(\beta_i\) are the regression coefficients, and \(\varepsilon_i\) is the residual variation. - Quadratic, but without the linear term (
quadsimp). It is the same asquadbut with \(\beta_1\) set to zero: \(y_i = \beta_0 + \beta_1 x_i^2 + \varepsilon_i\). This model is therefore less flexible, but also has fewer parameters which reduces the likelihood of overfitting. - Exponential (
exp), with \(y_i = \beta_0 + \beta_1 \exp(x_i) + \varepsilon_i\)
Visually, all these models fit the data reasonably:
# Helper function for displaying data with various model fits:
plotWithFormula <- function(dat, formula = y ~ x) {
dat |>
mergePlatformServer() |>
mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
plotPlatformServerFit(size_kb, time_sec, formula = formula)
}
plotWithFormula(dat, y ~ x + I(x^2))plotWithFormula(dat, y ~ I(x^2))plotWithFormula(dat, y ~ exp(x))Let us compare these models. We will fit them all and extract relevant regression statistics, then compare their AIC scores to perform model selection:
modelComparison <- dat |>
# Order platforms as [Swarm, IPFS, Arweave]:
mutate(platform = fct_relevel(platform,
"Swarm", "IPFS", "Arweave")) |>
arrange(platform, server, size_kb) |>
# Keep only those rows with a sha256 match:
filter(sha256_match) |>
# Create columns x (predictor; here log10 file size)
# and y (response; here log10 download time):
mutate(x = log10(size_kb), y = log10(time_sec)) |>
# Choose relevant columns only:
select(platform, server, x, y) |>
# Combine with possible models in a fully-factorial way:
crossing(formula = list(
"linear" = formula(y ~ x),
"quad" = formula(y ~ x + I(x^2)),
"quadsimp" = formula(y ~ I(x^2)),
"exp" = formula(y ~ exp(x))
)) |>
# Extract the model names in a separate column:
mutate(model = names(formula)) |>
# Package up the data to be modeled in sub-tables:
nest(data = x | y) |>
# Fit the models:
mutate(fit = map2(formula, data, lm)) |>
# Extract both regression tables and quality-of-fit indicators:
mutate(regression = map(fit, tidy),
quality = map(fit, glance)) We work with AIC because it is usually favored when prediction is the goal (as opposed to inference, for which BIC is more appropriate). Here is a table with the AIC scores:
modelComparison |>
unnest(quality) |>
select(platform | server | model | AIC) |>
pivot_wider(names_from = model, values_from = AIC) |>
kable()| platform | server | linear | quad | quadsimp | exp |
|---|---|---|---|---|---|
| Swarm | Server 2 | 74.65295 | -24.45972 | -14.61966 | -18.30025 |
| Swarm | Server 3 | 99.41558 | 51.70865 | 50.68415 | 66.33704 |
| IPFS | Server 1 | -26.73906 | -108.95065 | -99.92618 | -98.67160 |
| IPFS | Server 2 | -17.85868 | -135.50458 | -88.71638 | -100.76498 |
| IPFS | Server 3 | -293.78867 | -410.59626 | -397.74715 | -240.91414 |
| Arweave | Server 1 | -348.44062 | -474.17289 | -462.64087 | -487.35987 |
| Arweave | Server 2 | -536.61144 | -569.06277 | -568.41676 | -508.01725 |
| Arweave | Server 3 | -304.82322 | -317.63970 | -319.45885 | -302.36202 |
Finding the best models, based on AIC:
modelComparison |>
unnest(quality) |>
select(platform | server | model | AIC) |>
filter(AIC == min(AIC), .by = c(platform, server)) |>
kable()| platform | server | model | AIC |
|---|---|---|---|
| Swarm | Server 2 | quad | -24.45972 |
| Swarm | Server 3 | quadsimp | 50.68415 |
| IPFS | Server 1 | quad | -108.95065 |
| IPFS | Server 2 | quad | -135.50458 |
| IPFS | Server 3 | quad | -410.59626 |
| Arweave | Server 1 | exp | -487.35987 |
| Arweave | Server 2 | quad | -569.06277 |
| Arweave | Server 3 | quadsimp | -319.45885 |
The best model is quad one in most cases. Even in the cases when it isn’t, its AIC score is not far above that of the best model. This suggests adopting this model for prediction across the board. Later on we’ll also look at the predictions made by quadsimp; as we will see, it makes almost the same ones as quad.
Let us check the diagnostic plots for the quadratic model quad:
modelComparison |>
# Only keep the quadratic model ("quad"):
filter(model == "quad") |>
# Create a diagnostic plot for each regression:
mutate(diagnostics = map(fit, \(x) {
autoplot(x, smooth.colour = NA, alpha = 0.3,
colour = "steelblue") +
theme_bw()
} )) |>
# Embellish them with titles showing platform and server:
mutate(diagnostics = pmap(
list(diagnostics, platform, server),
\(dia, pf, sv, m) {
grid.arrange(grobs = dia@plots,
top = str_c(pf, ", ", sv, ", quadratic model"))
}
)) |>
suppressMessages() |>
capture.output() |>
# We only care about the plots, not the final tibble; suppress it:
invisible()Not always amazing, especially the quantile-quantile plot for IPFS, Server 1. That said, here are the regression results:
modelComparison |>
# Only keep the quadratic model ("quad"):
filter(model == "quad") |>
# Unpack the regression tables for each model fit:
unnest(regression) |>
# Keep only relevant columns:
select(platform, server, term, estimate, std.error,
statistic, p.value) |>
# Change term labels to human-readable ones:
mutate(term = case_match(
term,
"(Intercept)" ~ "beta_0",
"x" ~ "beta_1",
"I(x^2)" ~ "beta_2"
)) |>
kable()| platform | server | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| Swarm | Server 2 | beta_0 | -0.4370973 | 0.0376917 | -11.5966470 | 0.0000000 |
| Swarm | Server 2 | beta_1 | -0.1551407 | 0.0446488 | -3.4746920 | 0.0006724 |
| Swarm | Server 2 | beta_2 | 0.1273017 | 0.0107037 | 11.8932184 | 0.0000000 |
| Swarm | Server 3 | beta_0 | -0.5328200 | 0.0485860 | -10.9665446 | 0.0000000 |
| Swarm | Server 3 | beta_1 | -0.0563647 | 0.0575539 | -0.9793382 | 0.3290219 |
| Swarm | Server 3 | beta_2 | 0.1048560 | 0.0137975 | 7.5996436 | 0.0000000 |
| IPFS | Server 1 | beta_0 | -0.5521749 | 0.0284402 | -19.4153010 | 0.0000000 |
| IPFS | Server 1 | beta_1 | -0.1128020 | 0.0336896 | -3.3482709 | 0.0010330 |
| IPFS | Server 1 | beta_2 | 0.0849805 | 0.0080765 | 10.5219832 | 0.0000000 |
| IPFS | Server 2 | beta_0 | -0.8506275 | 0.0260311 | -32.6774074 | 0.0000000 |
| IPFS | Server 2 | beta_1 | -0.2317909 | 0.0308358 | -7.5169356 | 0.0000000 |
| IPFS | Server 2 | beta_2 | 0.0990083 | 0.0073923 | 13.3933988 | 0.0000000 |
| IPFS | Server 3 | beta_0 | -1.1938889 | 0.0104053 | -114.7382135 | 0.0000000 |
| IPFS | Server 3 | beta_1 | 0.0482081 | 0.0123259 | 3.9111100 | 0.0001399 |
| IPFS | Server 3 | beta_2 | 0.0393752 | 0.0029549 | 13.3253196 | 0.0000000 |
| Arweave | Server 1 | beta_0 | 0.0797464 | 0.0084182 | 9.4730916 | 0.0000000 |
| Arweave | Server 1 | beta_1 | -0.0371489 | 0.0099720 | -3.7253139 | 0.0002775 |
| Arweave | Server 1 | beta_2 | 0.0335930 | 0.0023906 | 14.0520845 | 0.0000000 |
| Arweave | Server 2 | beta_0 | 0.1931221 | 0.0061355 | 31.4760261 | 0.0000000 |
| Arweave | Server 2 | beta_1 | 0.0117555 | 0.0072680 | 1.6174336 | 0.1079286 |
| Arweave | Server 2 | beta_2 | 0.0107342 | 0.0017424 | 6.1606966 | 0.0000000 |
| Arweave | Server 3 | beta_0 | 0.2531398 | 0.0141848 | 17.8458048 | 0.0000000 |
| Arweave | Server 3 | beta_1 | 0.0070761 | 0.0168030 | 0.4211214 | 0.6742815 |
| Arweave | Server 3 | beta_2 | 0.0157366 | 0.0040282 | 3.9065947 | 0.0001423 |
Plotting the estimates, to see if they are consistent across servers:
modelComparison |>
# Only keep the quadratic model ("quad"):
filter(model == "quad") |>
# Order platforms as [Swarm, IPFS, Arweave]:
mutate(platform = fct_relevel(platform,
"Swarm", "IPFS", "Arweave")) |>
# Unpack the regression tables for each model fit:
unnest(regression) |>
# Keep only relevant columns:
select(platform, server, term, estimate) |>
# Change term labels to form that can be parsed:
mutate(term = case_match(
term,
"(Intercept)" ~ "beta[0]",
"x" ~ "beta[1]",
"I(x^2)" ~ "beta[2]"
)) |>
# Create plot:
ggplot(aes(x = term, y = estimate)) +
geom_point(color = "steelblue", alpha = 0.7) +
scale_x_discrete(labels = parse_format()) +
facet_grid(~ platform) +
theme_bw()While there is some spread, maybe it is not a great mistake to take the average of the parameters and treat them as “the” parameter for one platform. Let us compute these averages:
paramTab <- modelComparison |>
# Only keep the quadratic model ("quad"):
filter(model == "quad") |>
# Unpack the regression tables for each model fit:
unnest(regression) |>
# Keep only relevant columns:
select(platform, server, term, estimate) |>
# Compute mean parameters across servers, for each platform:
summarize(estimate = mean(estimate), .by = c(platform, term)) |>
# Change term labels to human-readable form:
mutate(term = case_match(
term,
"(Intercept)" ~ "b0",
"x" ~ "b1",
"I(x^2)" ~ "b2"
)) |>
# Make table easier to read:
pivot_wider(names_from = term, values_from = estimate)
paramTab |>
# Convert to Latex-compatible names, for parsing:
rename(`$\\beta_0$` = b0, `$\\beta_1$` = b1, `$\\beta_2$` = b2) |>
kable(escape = FALSE, format = "latex") |>
kable_styling(latex_options = "HOLD_position")So we have the following models:
| Swarm | \(y_i = -0.485 - 0.106 x_i + 0.116 x_i^2\) |
| IPFS | \(y_i = -0.866 - 0.0988 x_i + 0.0745 x_i^2\) |
| Arweave | \(y_i = 0.175 - 0.00611 x_i + 0.02 x_i^2\) |
This predicts that for small file sizes, IPFS is best (smallest \(\beta_0\)), and Swarm is in the middle. In turn, for very large files, Arweave will be best (smallest \(\beta_2\)) and IPFS second best. To see where the curves take over one another, we can plot all three of them:
# Compute log10(download time) from:
# - s: log10(file size in KB)
# - b0, b1, b2: regression coefficients
qfun <- function(s, b0, b1, b2) {
b0 + b1*s + b2*s^2
}
paramTab |>
mutate(platform = as_factor(platform)) |>
# Compute curves for each platform, and evaluate at various
# file size values, then store in a sub-table:
mutate(curve = pmap(list(b0, b1, b2), \(b0, b1, b2) {
tibble(x = seq(0, 7, l = 101)) |>
mutate(y = qfun(x, b0, b1, b2))
} )) |>
# Unpack nested tables:
unnest(curve) |>
# Switch to linear scale for plotting; will be changed back below:
mutate(x = 10^x, y = 10^y) |>
# Create plot:
ggplot(aes(x = x, y = y, color = platform)) +
geom_line(linewidth = 1) +
labs(x = "File size", y = "Predicted download time", color=NULL) +
scale_x_log10(
breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")
) +
scale_y_log10(
breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")
) +
scale_color_manual(
values = c("steelblue", "goldenrod", "forestgreen")
) +
theme_bw()Incidentally: as advertised above, we can create the same plot using the quadsimp model instead. The predictions do not change appreciably:
modelComparison |>
filter(model == "quadsimp") |>
unnest(regression) |>
select(platform, server, term, estimate) |>
summarize(estimate = mean(estimate), .by = c(platform, term)) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "b0",
"I(x^2)" ~ "b1"
)) |>
pivot_wider(names_from = term, values_from = estimate) |>
mutate(platform = as_factor(platform)) |>
mutate(curve = pmap(list(b0, b1), \(b0, b1) {
tibble(x = seq(0, 7, l = 101)) |>
mutate(y = qfun(x, b0, 0, b1))
} )) |>
unnest(curve) |>
mutate(x = 10^x, y = 10^y) |>
ggplot(aes(x = x, y = y, color = platform)) +
geom_line(linewidth = 1) +
labs(x = "File size", y = "Predicted download time", color=NULL) +
scale_x_log10(
breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")
) +
scale_y_log10(
breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")
) +
scale_color_manual(
values = c("steelblue", "goldenrod", "forestgreen")
) +
theme_bw()Plotting the two model predictions together, to show how close they are:
modelComparison |>
filter(model %in% c("quad", "quadsimp")) |>
unnest(regression) |>
select(platform, server, model, term, estimate) |>
summarize(estimate = mean(estimate),
.by = c(platform, term, model)) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "b0",
"x" ~ "b1",
"I(x^2)" ~ "b2"
)) |>
pivot_wider(names_from = term, values_from = estimate,
values_fill = 0) |>
mutate(platform = as_factor(platform)) |>
mutate(curve = pmap(list(b0, b1, b2), \(b0, b1, b2) {
tibble(x = seq(0, 7, l = 101)) |>
mutate(y = qfun(x, b0, b1, b2))
} )) |>
unnest(curve) |>
mutate(x = 10^x, y = 10^y) |>
ggplot(aes(x = x, y = y, color = platform, linetype = model)) +
geom_line(linewidth = 1) +
labs(x = "File size", y = "Predicted download time",
color = "Platform", linetype = "Model") +
scale_x_log10(
breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")
) +
scale_y_log10(
breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")
) +
scale_color_manual(
values = c("steelblue", "goldenrod", "forestgreen")
) +
theme_bw()Finally, a quick note: it is unlikely that the model is realistic for very large files. For instance, we see that Swarm takes between about 10 minutes (based on quadsimp) and 20 minutes (based on quad) to download 1GB of data. But it is unlikely that downloading 100GB would take 2.44 days (!), as the quadsimp model would predict.